Librerias

library(cluster)
library(broom)
library(glmnet)
library(modelr)
library(ggplot2)
library(cowplot)

library(mongolite)
library(ggmap)
library(dplyr)
library(sp)
library(lubridate)
library(tidyr)
library(reshape2)
library(stringr)
# barriosCategoricos = inner_join(mcuadrado,analisis_Barrios , by="barrio")%>%select(barrio, mCuadradoC, pVentasC)
# preciosModelo = inner_join(precios,barriosCategoricos , by="barrio")
# preciosModelo = inner_join(preciosModelo,productos , by=c("producto"="id"))
# preciosModelo = select (preciosModelo,-c(fecha,nombre,presentacion,producto))
# preciosModelo
# 
# 
# write.csv(preciosModelo,"/home/ignacio/datos/facultad/repos/tpEspecializacion/data/preciosModelo.csv", row.names = TRUE)
preciosModelo <- read.csv(file = '/home/ignacio/datos/facultad/repos/tpEspecializacion/data/preciosModelo.csv')
preciosModelo = select (preciosModelo,-c(X,barrio,sucursal,sucursalTipo))
preciosModelo <- preciosModelo %>% resample_partition(c(train=0.3,test=0.7))
preciosModelo = preciosModelo$train %>% as_tibble()
train_test <- preciosModelo %>% resample_partition(c(train=0.7,test=0.3))
precios_train <- train_test$train %>% as_tibble()
precios_test <- train_test$test %>% as_tibble()

Modelo lineal simple para explicar el precio en función de la bandera supermercado

# banderaDescripcion + medicion + barrio + banderaDescripcion + pVentasC + mCuadradoC
lm_precio2bandera = lm(formula = precio~banderaDescripcion, data=preciosModelo)
lm_precio2medicion = lm(formula = precio~medicion, data=preciosModelo)
#lm_precio2barrio = lm(formula = precio~barrio, data=precios)

Analisis de precio por Barrio

summary(lm_precio2bandera)
#coef(lm_precio2bandera)
summary(lm_precio2medicion)
glance(lm_precio2bandera)
glance(lm_precio2medicion)
# banderaDescripcion + sucursalTipo + medicion + pVentasC + mCuadradoC
lm_precioMultiple = lm(precio ~ banderaDescripcion + medicion + pVentasC + mCuadradoC + marca, data=preciosModelo)
# medicion - barrio - sucursalTipo - banderaDescripcion
summary(lm_precioMultiple)

Call:
lm(formula = precio ~ banderaDescripcion + medicion + pVentasC + 
    mCuadradoC + marca, data = preciosModelo)

Residuals:
    Min      1Q  Median      3Q     Max 
-219.91  -12.45   -1.09    8.85  368.69 

Coefficients:
                                           Estimate Std. Error  t value Pr(>|t|)    
(Intercept)                               198.29125    1.72366  115.041  < 2e-16 ***
banderaDescripcionDisco                     4.60671    0.17144   26.871  < 2e-16 ***
banderaDescripcionExpress                  -3.20800    0.26391  -12.156  < 2e-16 ***
banderaDescripcionHipermercado Carrefour   -4.65928    0.30709  -15.172  < 2e-16 ***
banderaDescripcionJOSIMAR SUPERMERCADOS    -4.84252    0.77112   -6.280 3.39e-10 ***
banderaDescripcionJumbo                     3.94146    0.31916   12.349  < 2e-16 ***
banderaDescripcionMarket                   -4.13624    0.16506  -25.060  < 2e-16 ***
banderaDescripcionMi Changomas             -3.40764    0.94764   -3.596 0.000323 ***
banderaDescripcionSupermercados DIA        -5.07030    0.25715  -19.717  < 2e-16 ***
banderaDescripcionVea                       1.22593    0.24227    5.060 4.19e-07 ***
medicion                                    1.01076    0.01980   51.061  < 2e-16 ***
pVentasCbajo                                0.30080    0.24994    1.204 0.228783    
pVentasCmedio                               0.03088    0.16613    0.186 0.852526    
mCuadradoCbajo                             -0.10130    0.17685   -0.573 0.566771    
mCuadradoCmedio                            -0.32164    0.13485   -2.385 0.017069 *  
marca7 UP                                -137.89813    2.13015  -64.736  < 2e-16 ***
marca9 DE ORO                            -176.15951    1.90340  -92.550  < 2e-16 ***
marcaACTIMEL                              -56.10958    2.56182  -21.902  < 2e-16 ***
marcaADES                                -157.01825    1.95050  -80.502  < 2e-16 ***
marcaƁGUILA                              -122.27633    2.00493  -60.988  < 2e-16 ***
marcaAIMƉ                                 -36.19008    2.13883  -16.920  < 2e-16 ***
marcaALA                                  -86.13896    1.80748  -47.657  < 2e-16 ***
marcaALELUYA                             -122.94738    2.52903  -48.614  < 2e-16 ***
marcaALICANTE                            -159.60632    2.15442  -74.083  < 2e-16 ***
marcaALMA MORA                            -39.21732    2.11090  -18.578  < 2e-16 ***
marcaALWAYS                              -125.75331    2.54611  -49.390  < 2e-16 ***
marcaAMANDA                               -85.41617    2.52269  -33.859  < 2e-16 ***
marcaAMOR                                -131.68104    2.51658  -52.325  < 2e-16 ***
marcaAQUARIUS                            -153.17470    1.80415  -84.901  < 2e-16 ***
marcaARCOR                               -155.80267    1.75248  -88.904  < 2e-16 ***
marcaARIEL                                 81.35897    2.01726   40.332  < 2e-16 ***
marcaARLISTƁN                             -44.67356    2.12490  -21.024  < 2e-16 ***
marcaARMONƍA                             -180.77299    2.14961  -84.096  < 2e-16 ***
marcaARROCITAS                           -154.38957    2.15024  -71.801  < 2e-16 ***
marcaAXE                                 -112.42388    1.84790  -60.839  < 2e-16 ***
marcaAYUDƍN                              -148.65242    1.84940  -80.379  < 2e-16 ***
marcaBAGLEY                              -135.94366    2.09560  -64.871  < 2e-16 ***
marcaBC                                  -184.89126    1.94033  -95.288  < 2e-16 ***
marcaBENJAMƍN                             -60.54702    2.16285  -27.994  < 2e-16 ***
marcaBIMBO                               -139.66088    2.51049  -55.631  < 2e-16 ***
marcaBLEM                                 -57.94081    1.93725  -29.909  < 2e-16 ***
marcaBODEGA TRAPICHE                     -114.10991    2.49483  -45.739  < 2e-16 ***
marcaBOLS                                 -67.61167    2.52276  -26.801  < 2e-16 ***
marcaBON O BON                           -179.64268    2.41561  -74.367  < 2e-16 ***
marcaBONAFIDE                            -128.92564    2.53535  -50.851  < 2e-16 ***
marcaBONAQUA                             -168.96695    2.13885  -78.999  < 2e-16 ***
marcaBRAHMA                              -135.59944    2.45823  -55.161  < 2e-16 ***
marcaBRANCA                                 8.54327    2.10148    4.065 4.80e-05 ***
marcaBUDWEISER                           -126.91840    2.51453  -50.474  < 2e-16 ***
marcaBUTTER TOFFEES                      -143.31532    2.12909  -67.313  < 2e-16 ***
marcaCABRALES                             -73.28866    2.12961  -34.414  < 2e-16 ***
marcaCACHAMAI                            -161.68245    2.52071  -64.142  < 2e-16 ***
marcaCALLIA                               -84.20334    2.00989  -41.895  < 2e-16 ***
marcaCAMPARI                               50.51196    2.50850   20.136  < 2e-16 ***
marcaCAƑUELAS                            -142.65323    2.00803  -71.041  < 2e-16 ***
marcaCAPITƁN MORGAN                       102.39305    3.33682   30.686  < 2e-16 ***
marcaCAREFREE                             -60.18091    1.87353  -32.122  < 2e-16 ***
marcaCARIOCA                             -188.36207    2.52901  -74.481  < 2e-16 ***
marcaCARREFOUR                           -118.23961    4.05825  -29.136  < 2e-16 ***
marcaCASANCREM                           -124.71248    1.91978  -64.962  < 2e-16 ***
marcaCASANTO                             -168.56967    2.42498  -69.514  < 2e-16 ***
marcaCASTEL                              -102.23429    2.49491  -40.977  < 2e-16 ***
marcaCASTELL                             -157.07325    1.94522  -80.748  < 2e-16 ***
marcaCAT CHOW                            -127.55390    2.49290  -51.167  < 2e-16 ***
marcaCBSE                                -110.47936    1.99569  -55.359  < 2e-16 ***
marcaCELUSAL                             -160.93824    1.92997  -83.389  < 2e-16 ***
marcaCEPITA                              -167.91045    1.81950  -92.284  < 2e-16 ***
marcaCERAMICOL                           -120.77981    2.16286  -55.843  < 2e-16 ***
marcaCEREAL MIX                           -96.04001    1.93604  -49.606  < 2e-16 ***
marcaCEREALITAS                          -134.30273    2.13444  -62.922  < 2e-16 ***
marcaCHANDON                               96.53849    2.15924   44.709  < 2e-16 ***
marcaCHOCOLINAS                          -161.08396    2.11630  -76.116  < 2e-16 ***
marcaCIF                                 -146.03608    1.77885  -82.096  < 2e-16 ***
marcaCINDOR                              -139.09480    2.12910  -65.330  < 2e-16 ***
marcaCINZANO                              -68.77240    2.47241  -27.816  < 2e-16 ***
marcaCITRIC                              -134.54692    2.11981  -63.471  < 2e-16 ***
marcaCLIGHT                              -193.82313    1.80967 -107.104  < 2e-16 ***
marcaCOCA COLA                           -138.00163    1.83247  -75.309  < 2e-16 ***
marcaCOCINERO                             -39.01233    1.90576  -20.471  < 2e-16 ***
marcaCOLGATE                             -122.66165    1.80778  -67.852  < 2e-16 ***
marcaCOLON                               -103.46981    2.11987  -48.810  < 2e-16 ***
marcaCOMFORT                             -113.59488    2.43464  -46.658  < 2e-16 ***
marcaCOQUITAS                            -170.01757    2.49481  -68.148  < 2e-16 ***
marcaCORONA                              -136.36139    2.47973  -54.990  < 2e-16 ***
marcaCOTO                                -125.78605    3.90863  -32.182  < 2e-16 ***
marcaCRIOLLITAS                          -147.93901    1.93605  -76.413  < 2e-16 ***
marcaCRUSH                               -160.80440    2.13017  -75.489  < 2e-16 ***
marcaCRUZ DE MALTA                       -109.13862    2.13828  -51.040  < 2e-16 ***
marcaCUSENIER                             -78.36174    1.95090  -40.167  < 2e-16 ***
marcaCUTEX                               -142.90525    2.57105  -55.582  < 2e-16 ***
marcaDADA                                 -42.10277    1.99790  -21.074  < 2e-16 ***
marcaDANETTE                             -155.47588    1.91992  -80.980  < 2e-16 ***
marcaDƁNICA                              -156.74175    2.49100  -62.923  < 2e-16 ***
marcaDANONINO                            -166.55299    1.92515  -86.514  < 2e-16 ***
marcaDƍA                                 -148.22982    6.56156  -22.591  < 2e-16 ***
marcaDOG CHOW                             -22.62106    2.00883  -11.261  < 2e-16 ***
marcaDON DAVID                             32.32714    2.52897   12.783  < 2e-16 ***
marcaDON SATUR                           -177.90658    1.99812  -89.037  < 2e-16 ***
marcaDON VICENTE                         -128.51714    1.93983  -66.252  < 2e-16 ***
marcaDOS ANCLAS                          -162.10630    1.79408  -90.356  < 2e-16 ***
marcaDOVE                                -118.41401    1.82868  -64.754  < 2e-16 ***
marcaDR LEMON                            -137.69946    2.15977  -63.756  < 2e-16 ***
marcaDRIVE                                 68.67541    2.58766   26.540  < 2e-16 ***
marcaECHO                                -142.14056    2.49289  -57.018  < 2e-16 ***
marcaECO DE LOS ANDES                    -174.09288    2.59746  -67.024  < 2e-16 ***
marcaELEMENTOS                            -52.51147    2.16848  -24.216  < 2e-16 ***
marcaELITE                               -143.92018    2.01646  -71.373  < 2e-16 ***
marcaESTANCIA MENDOZA                    -113.25004    2.11482  -53.551  < 2e-16 ***
marcaESTRELLA                            -124.42743    1.95253  -63.726  < 2e-16 ***
marcaETCHART                             -107.60114    2.51867  -42.721  < 2e-16 ***
marcaEXPRESS                             -146.87548    1.96714  -74.664  < 2e-16 ***
marcaEXQUISITA                           -160.16688    1.78223  -89.869  < 2e-16 ***
marcaFANTA                               -135.06308    1.89945  -71.106  < 2e-16 ***
marcaFAVORITA                            -174.94957    2.16163  -80.934  < 2e-16 ***
marcaFINCA EL PORTILLO                    -51.03729    2.00489  -25.456  < 2e-16 ***
marcaFINCA FLICHMAN                       -78.09996    2.53325  -30.830  < 2e-16 ***
marcaFINCA LAS MORAS                      -55.15308    2.13175  -25.872  < 2e-16 ***
marcaFINCA NATALINA                       -42.45296    2.51863  -16.856  < 2e-16 ***
marcaFINLANDIA                           -128.87324    1.84705  -69.772  < 2e-16 ***
marcaFOND DE CAVE                          29.50987    2.49291   11.838  < 2e-16 ***
marcaFORMIS                              -138.27945    2.47614  -55.845  < 2e-16 ***
marcaFRIZEE                              -127.40379    2.11679  -60.187  < 2e-16 ***
marcaFRUTIGRAN                           -151.79374    2.48907  -60.984  < 2e-16 ***
marcaFUYƍ                                -152.73717    2.01341  -75.860  < 2e-16 ***
marcaGALLO                               -111.91774    1.93297  -57.899  < 2e-16 ***
marcaGALLO SNACKS                        -151.69041    2.43956  -62.179  < 2e-16 ***
marcaGANCIA                               -72.65155    2.14104  -33.933  < 2e-16 ***
marcaGATORADE                            -157.29202    1.84912  -85.063  < 2e-16 ***
marcaGENSER                               -86.08933    2.13994  -40.230  < 2e-16 ***
marcaGIACOMO                              -95.78526    2.01047  -47.643  < 2e-16 ***
marcaGILLETTE                             -63.45704    1.90251  -33.354  < 2e-16 ***
marcaGLACIAR                             -163.83023    2.12232  -77.194  < 2e-16 ***
marcaGOMES DA COSTA                      -103.38454    2.12085  -48.747  < 2e-16 ***
marcaGRANBY                              -145.54428    2.45826  -59.206  < 2e-16 ***
marcaGRANIX                              -151.62736    1.78494  -84.948  < 2e-16 ***
marcaGRANJA DEL SOL                       -95.07063    1.82919  -51.974  < 2e-16 ***
marcaGREEN HILLS                         -148.49983    1.99238  -74.534  < 2e-16 ***
marcaH2OH!                               -148.24895    2.01285  -73.651  < 2e-16 ***
marcaHARPIC                              -110.74843    2.11879  -52.270  < 2e-16 ***
marcaHEINEKEN                            -102.23365    2.42819  -42.103  < 2e-16 ***
marcaHELLMANN'S                          -151.34624    1.84201  -82.163  < 2e-16 ***
marcaHERBAL ESSENCES                     -113.96433    2.16044  -52.751  < 2e-16 ***
marcaHEREFORD                            -142.01379    2.10615  -67.428  < 2e-16 ***
marcaHIGIENOL                            -118.01849    1.95190  -60.463  < 2e-16 ***
marcaHILERET                             -125.01425    1.88701  -66.250  < 2e-16 ***
marcaHINDS                                -91.65300    2.53746  -36.120  < 2e-16 ***
marcaHIRAM WALKER                         -37.06543    2.49486  -14.857  < 2e-16 ***
marcaHOGAREƑAS                           -173.30418    2.51859  -68.810  < 2e-16 ***
marcaHUGGIES                              -58.68584    1.93997  -30.251  < 2e-16 ***
marcaIGUANA                              -145.84941    2.52894  -57.672  < 2e-16 ***
marcaIMPERIAL                            -123.19073    2.53536  -48.589  < 2e-16 ***
marcaISENBECK                            -146.54397    2.55056  -57.456  < 2e-16 ***
marcaJ&B                                  389.79078    2.47246  157.653  < 2e-16 ***
marcaJOHNSON'S                            -68.94229    2.02267  -34.085  < 2e-16 ***
marcaJORGITO                             -138.07653    2.14672  -64.320  < 2e-16 ***
marcaKELLOGGS                             -80.86496    2.44456  -33.080  < 2e-16 ***
marcaKESITAS                             -167.40015    2.13940  -78.246  < 2e-16 ***
marcaKILLKA                                 8.69983    2.50260    3.476 0.000508 ***
marcaKIN                                 -156.34305    2.13230  -73.321  < 2e-16 ***
marcaKINDER                              -156.73171    2.19765  -71.318  < 2e-16 ***
marcaKNORR                               -163.47065    1.77837  -91.922  < 2e-16 ***
marcaKNORR QUICK                         -140.59278    2.15795  -65.151  < 2e-16 ***
marcaKOLYNOS                             -145.50512    2.48345  -58.590  < 2e-16 ***
marcaKOTEX                                -55.20017    1.90273  -29.011  < 2e-16 ***
marcaKRACHITOS                           -156.46095    1.93709  -80.771  < 2e-16 ***
marcaLA CAMPAGNOLA                       -138.64847    1.79305  -77.325  < 2e-16 ***
marcaLA MERCED                            -90.69662    2.48534  -36.493  < 2e-16 ***
marcaLA MORENITA                          -94.37262    2.15740  -43.744  < 2e-16 ***
marcaLA SALTEƑA                          -132.52001    1.89987  -69.752  < 2e-16 ***
marcaLA SERENƍSIMA                       -141.27571    1.75074  -80.695  < 2e-16 ***
marcaLA TRANQUERA                        -132.79727    2.01178  -66.010  < 2e-16 ***
marcaLA VIRGINIA                         -131.47425    1.86642  -70.442  < 2e-16 ***
marcaLACTAL                              -159.35071    2.13883  -74.504  < 2e-16 ***
marcaLATITUD 33                           -40.81538    2.00995  -20.307  < 2e-16 ***
marcaLAYS                                 -87.27208    2.44963  -35.627  < 2e-16 ***
marcaLEVITƉ                              -139.50420    1.83795  -75.902  < 2e-16 ***
marcaLINCOLN                             -173.16190    2.11831  -81.745  < 2e-16 ***
marcaLORD CHESELINE                      -124.46094    2.47605  -50.266  < 2e-16 ***
marcaLOS ƁRBOLES                          -65.96835    2.13121  -30.953  < 2e-16 ***
marcaLUCCHETTI                           -142.43240    1.77092  -80.428  < 2e-16 ***
marcaLYSOFORM                            -123.89572    1.84381  -67.195  < 2e-16 ***
marcaMAGGI                               -158.01542    2.13940  -73.860  < 2e-16 ***
marcaMAGISTRAL                           -130.95631    1.93397  -67.714  < 2e-16 ***
marcaMAIZENA                             -152.26310    2.15435  -70.677  < 2e-16 ***
marcaMANƁ                                -161.49117    2.00415  -80.578  < 2e-16 ***
marcaMARUCHAN                            -155.42111    2.18226  -71.220  < 2e-16 ***
marcaMATARAZZO                           -149.53859    1.78577  -83.739  < 2e-16 ***
marcaMAYOLIVA                            -164.35469    2.51866  -65.255  < 2e-16 ***
marcaMAZOLA                               -91.77531    2.49482  -36.786  < 2e-16 ***
marcaMC CAIN                              -89.00577    2.49102  -35.731  < 2e-16 ***
marcaMEDIA TARDE                         -165.29177    2.48531  -66.507  < 2e-16 ***
marcaMELBA                               -174.06801    2.47425  -70.352  < 2e-16 ***
marcaMELITAS                             -171.94959    2.55058  -67.416  < 2e-16 ***
marcaMELLIZAS                            -153.13601    2.14675  -71.334  < 2e-16 ***
marcaMENDICRIM                           -130.64518    2.14910  -60.791  < 2e-16 ***
marcaMENOYO                              -161.96846    1.92914  -83.959  < 2e-16 ***
marcaMERENGADAS                          -155.65945    2.12079  -73.397  < 2e-16 ***
marcaMICHEL TORINO                       -152.47342    2.13124  -71.542  < 2e-16 ***
marcaMILLER                              -103.51983    2.49677  -41.461  < 2e-16 ***
marcaMINERVA                             -124.18554    2.51451  -49.388  < 2e-16 ***
 [ reached getOption("max.print") -- omitted 107 rows ]
---
Signif. codes:  0 ā€˜***’ 0.001 ā€˜**’ 0.01 ā€˜*’ 0.05 ā€˜.’ 0.1 ā€˜ ’ 1

Residual standard error: 33.49 on 343659 degrees of freedom
Multiple R-squared:  0.7404,    Adjusted R-squared:  0.7402 
F-statistic:  3203 on 306 and 343659 DF,  p-value: < 2.2e-16

Analisis modelo compuesto

Analisis Residuo

precioMultiple_resid = augment(lm_precioMultiple)
precioMultiple_resid
#El promedio de los residuos debe ser un numero muy cercano a cero
mean(precioMultiple_resid$.resid)
[1] -1.603159e-11

Como se puede apreciar el valor obtenido del promedio de todos los residuos, es un numero cercano a cero.

ggplot(precioMultiple_resid, aes(precioMultiple_resid$.resid)) + 
  geom_freqpoly(binwidth = 1.5)+
  labs(fill = "precioMultiple_resid$.resid", title = "Poligono de frecuencia de los residuos", x = "Residuo", y = "count")


ggplot(precioMultiple_resid, aes(sample= .std.resid))+
  stat_qq()+
  geom_abline()+
  labs(title = "Normal QQ plot", x = "Valores teóricos", y = "Residuos estandarizados")

Se quiere validar, si los residuos siguien una distribucion teorica, N(0,1). Como podemos ver el modelo en los extremos tiende a alejarse de la distribucion Normal, por lo que puedo concluir que el modelo no esta bien definido.


ggplot(precioMultiple_resid, aes(.fitted, .resid)) +
  geom_point()+
  geom_hline(yintercept = 0) +
  geom_smooth(se = FALSE)+
    labs(title = "Residuos versus el modelo ajustado", x = "valores fitted", y = "Residuos")

Otro caso interesando para estudiar, es si los residuos tienen o no una estructura definida. Lo que se obseva es una clara estructura en el medio del grafico, esto esta indicando que una parte sistemÔtica del fenómeno que se esta perdiendo, lo cual indica que el modelo no esta funcionando como se esperaria.

Modelo Logaritmico

log(price)=β0+β1log(rooms)+β2log(bathrooms)+β3log(surface_covered)+β4property_type+β5barrio+β6surface_patio

# banderaDescripcion + sucursalTipo + medicion + banderaDescripcion + pVentasC + mCuadradoC
preciosModelo_log                    = preciosModelo
preciosModelo_log$precio             = log(preciosModelo_log$precio)
preciosModelo_log$medicion           = log(preciosModelo_log$medicion)

lm_precioMultiple_log = lm(precio ~ banderaDescripcion + medicion + pVentasC + mCuadradoC + marca, data=preciosModelo_log)
summary(lm_precioMultiple_log)

Call:
lm(formula = precio ~ banderaDescripcion + medicion + pVentasC + 
    mCuadradoC + marca, data = preciosModelo_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.30022 -0.15570  0.00231  0.15488  1.76680 

Coefficients:
                                           Estimate Std. Error  t value Pr(>|t|)    
(Intercept)                               5.2465752  0.0175428  299.073  < 2e-16 ***
banderaDescripcionDisco                   0.0523972  0.0017439   30.045  < 2e-16 ***
banderaDescripcionExpress                -0.0305207  0.0026844  -11.370  < 2e-16 ***
banderaDescripcionHipermercado Carrefour -0.0521540  0.0031237  -16.696  < 2e-16 ***
banderaDescripcionJOSIMAR SUPERMERCADOS  -0.0433126  0.0078436   -5.522 3.35e-08 ***
banderaDescripcionJumbo                   0.0422233  0.0032465   13.006  < 2e-16 ***
banderaDescripcionMarket                 -0.0493066  0.0016789  -29.369  < 2e-16 ***
banderaDescripcionMi Changomas           -0.0300800  0.0096393   -3.121 0.001805 ** 
banderaDescripcionSupermercados DIA      -0.0503217  0.0026157  -19.239  < 2e-16 ***
banderaDescripcionVea                     0.0148221  0.0024643    6.015 1.80e-09 ***
medicion                                  0.0474396  0.0008331   56.943  < 2e-16 ***
pVentasCbajo                              0.0020275  0.0025423    0.798 0.425152    
pVentasCmedio                            -0.0004915  0.0016898   -0.291 0.771157    
mCuadradoCbajo                           -0.0006376  0.0017988   -0.354 0.722996    
mCuadradoCmedio                          -0.0032214  0.0013716   -2.349 0.018845 *  
marca7 UP                                -1.1539994  0.0216674  -53.260  < 2e-16 ***
marca9 DE ORO                            -2.0062854  0.0193610 -103.625  < 2e-16 ***
marcaACTIMEL                             -0.3208587  0.0260582  -12.313  < 2e-16 ***
marcaADES                                -1.4803950  0.0198400  -74.617  < 2e-16 ***
marcaƁGUILA                              -0.9287971  0.0203937  -45.543  < 2e-16 ***
marcaAIMƉ                                -0.2046459  0.0217557   -9.407  < 2e-16 ***
marcaALA                                 -0.8111670  0.0183853  -44.120  < 2e-16 ***
marcaALELUYA                             -0.9264400  0.0257247  -36.014  < 2e-16 ***
marcaALICANTE                            -1.5357676  0.0219142  -70.081  < 2e-16 ***
marcaALMA MORA                           -0.2237137  0.0214716  -10.419  < 2e-16 ***
marcaALWAYS                              -0.9632061  0.0258985  -37.192  < 2e-16 ***
marcaAMANDA                              -0.5447960  0.0256602  -21.231  < 2e-16 ***
marcaAMOR                                -1.0429921  0.0255981  -40.745  < 2e-16 ***
marcaAQUARIUS                            -1.4449102  0.0183514  -78.736  < 2e-16 ***
marcaARCOR                               -1.6392252  0.0178258  -91.958  < 2e-16 ***
marcaARIEL                                0.1929478  0.0205190    9.403  < 2e-16 ***
marcaARLISTƁN                            -0.2608174  0.0216140  -12.067  < 2e-16 ***
marcaARMONƍA                             -2.2040146  0.0218654 -100.799  < 2e-16 ***
marcaARROCITAS                           -1.4308773  0.0218717  -65.421  < 2e-16 ***
marcaAXE                                 -0.8085877  0.0187964  -43.018  < 2e-16 ***
marcaAYUDƍN                              -1.3480659  0.0188116  -71.661  < 2e-16 ***
marcaBAGLEY                              -1.1207625  0.0213159  -52.579  < 2e-16 ***
marcaBC                                  -2.8589195  0.0197366 -144.854  < 2e-16 ***
marcaBENJAMƍN                            -0.3583170  0.0220001  -16.287  < 2e-16 ***
marcaBIMBO                               -1.1622996  0.0255361  -45.516  < 2e-16 ***
marcaBLEM                                -0.4158622  0.0197053  -21.104  < 2e-16 ***
marcaBODEGA TRAPICHE                     -0.8225229  0.0253768  -32.412  < 2e-16 ***
marcaBOLS                                -0.4038059  0.0256610  -15.736  < 2e-16 ***
marcaBON O BON                           -2.1546456  0.0245710  -87.690  < 2e-16 ***
marcaBONAFIDE                            -1.0024718  0.0257890  -38.872  < 2e-16 ***
marcaBONAQUA                             -1.7809555  0.0217559  -81.861  < 2e-16 ***
marcaBRAHMA                              -1.0968801  0.0250045  -43.867  < 2e-16 ***
marcaBRANCA                               0.0119388  0.0213758    0.559 0.576489    
marcaBUDWEISER                           -0.9792220  0.0255773  -38.285  < 2e-16 ***
marcaBUTTER TOFFEES                      -1.2250475  0.0216566  -56.567  < 2e-16 ***
marcaCABRALES                            -0.5076868  0.0216619  -23.437  < 2e-16 ***
marcaCACHAMAI                            -1.5796366  0.0256400  -61.608  < 2e-16 ***
marcaCALLIA                              -0.5357679  0.0204441  -26.206  < 2e-16 ***
marcaCAMPARI                              0.2246875  0.0255159    8.806  < 2e-16 ***
marcaCAƑUELAS                            -1.3283866  0.0204252  -65.037  < 2e-16 ***
marcaCAPITƁN MORGAN                       0.4247435  0.0339414   12.514  < 2e-16 ***
marcaCAREFREE                            -0.4960337  0.0190571  -26.029  < 2e-16 ***
marcaCARIOCA                             -2.5974681  0.0257245 -100.973  < 2e-16 ***
marcaCARREFOUR                           -0.8688628  0.0412796  -21.048  < 2e-16 ***
marcaCASANCREM                           -0.9509495  0.0195276  -48.698  < 2e-16 ***
marcaCASANTO                             -1.7616566  0.0246664  -71.419  < 2e-16 ***
marcaCASTEL                              -0.6966323  0.0253776  -27.451  < 2e-16 ***
marcaCASTELL                             -1.5109279  0.0197863  -76.362  < 2e-16 ***
marcaCAT CHOW                            -0.9867196  0.0253572  -38.913  < 2e-16 ***
marcaCBSE                                -0.8272600  0.0202997  -40.752  < 2e-16 ***
marcaCELUSAL                             -1.6854197  0.0196312  -85.854  < 2e-16 ***
marcaCEPITA                              -1.8663587  0.0185075 -100.843  < 2e-16 ***
marcaCERAMICOL                           -0.9147630  0.0220002  -41.580  < 2e-16 ***
marcaCEREAL MIX                          -0.6462563  0.0196930  -32.817  < 2e-16 ***
marcaCEREALITAS                          -1.1968618  0.0217111  -55.127  < 2e-16 ***
marcaCHANDON                              0.3903256  0.0219633   17.772  < 2e-16 ***
marcaCHOCOLINAS                          -1.5875778  0.0215265  -73.750  < 2e-16 ***
marcaCIF                                 -1.3026681  0.0180941  -71.994  < 2e-16 ***
marcaCINDOR                              -1.2006211  0.0216567  -55.439  < 2e-16 ***
marcaCINZANO                             -0.4151771  0.0251488  -16.509  < 2e-16 ***
marcaCITRIC                              -1.1201624  0.0215622  -51.950  < 2e-16 ***
marcaCLIGHT                              -3.0738741  0.0184076 -166.990  < 2e-16 ***
marcaCOCA COLA                           -1.2328310  0.0186394  -66.141  < 2e-16 ***
marcaCOCINERO                            -0.4546462  0.0193850  -23.454  < 2e-16 ***
marcaCOLGATE                             -0.9793684  0.0183884  -53.260  < 2e-16 ***
marcaCOLON                               -0.7169816  0.0215628  -33.251  < 2e-16 ***
marcaCOMFORT                             -0.8186383  0.0247646  -33.057  < 2e-16 ***
marcaCOQUITAS                            -1.8188622  0.0253766  -71.675  < 2e-16 ***
marcaCORONA                              -1.1115120  0.0252233  -44.067  < 2e-16 ***
marcaCOTO                                -0.9610497  0.0397577  -24.173  < 2e-16 ***
marcaCRIOLLITAS                          -1.3127722  0.0196931  -66.662  < 2e-16 ***
marcaCRUSH                               -1.5708083  0.0216676  -72.496  < 2e-16 ***
marcaCRUZ DE MALTA                       -0.8240624  0.0217501  -37.888  < 2e-16 ***
marcaCUSENIER                            -0.4897582  0.0198441  -24.680  < 2e-16 ***
marcaCUTEX                               -1.2117025  0.0261522  -46.333  < 2e-16 ***
marcaDADA                                -0.2399010  0.0203221  -11.805  < 2e-16 ***
marcaDANETTE                             -1.4518986  0.0195290  -74.346  < 2e-16 ***
marcaDƁNICA                              -1.4766242  0.0253378  -58.277  < 2e-16 ***
marcaDANONINO                            -1.7118613  0.0195822  -87.419  < 2e-16 ***
marcaDƍA                                 -1.3482305  0.0667427  -20.200  < 2e-16 ***
marcaDOG CHOW                            -0.1573592  0.0204333   -7.701 1.35e-14 ***
marcaDON DAVID                            0.1436199  0.0257241    5.583 2.36e-08 ***
marcaDON SATUR                           -2.0849374  0.0203244 -102.583  < 2e-16 ***
marcaDON VICENTE                         -0.9998907  0.0197315  -50.675  < 2e-16 ***
marcaDOS ANCLAS                          -1.6911314  0.0182489  -92.670  < 2e-16 ***
marcaDOVE                                -0.9488375  0.0186009  -51.010  < 2e-16 ***
marcaDR LEMON                            -1.1871182  0.0219687  -54.037  < 2e-16 ***
marcaDRIVE                                0.2889805  0.0263210   10.979  < 2e-16 ***
marcaECHO                                -1.2092731  0.0253570  -47.690  < 2e-16 ***
marcaECO DE LOS ANDES                    -1.9357083  0.0264207  -73.265  < 2e-16 ***
marcaELEMENTOS                           -0.3058845  0.0220573  -13.868  < 2e-16 ***
marcaELITE                               -1.3233980  0.0205110  -64.521  < 2e-16 ***
marcaESTANCIA MENDOZA                    -0.8198669  0.0215114  -38.113  < 2e-16 ***
marcaESTRELLA                            -1.0696994  0.0198607  -53.860  < 2e-16 ***
marcaETCHART                             -0.7583687  0.0256193  -29.601  < 2e-16 ***
marcaEXPRESS                             -1.3592585  0.0200093  -67.931  < 2e-16 ***
marcaEXQUISITA                           -1.6589731  0.0181284  -91.512  < 2e-16 ***
marcaFANTA                               -1.1707195  0.0193208  -60.594  < 2e-16 ***
marcaFAVORITA                            -1.9739965  0.0219877  -89.778  < 2e-16 ***
marcaFINCA EL PORTILLO                   -0.2915730  0.0203933  -14.297  < 2e-16 ***
marcaFINCA FLICHMAN                      -0.4942011  0.0257676  -19.179  < 2e-16 ***
marcaFINCA LAS MORAS                     -0.3298260  0.0216837  -15.211  < 2e-16 ***
marcaFINCA NATALINA                      -0.2376413  0.0256190   -9.276  < 2e-16 ***
marcaFINLANDIA                           -1.0233920  0.0187878  -54.471  < 2e-16 ***
marcaFOND DE CAVE                         0.1273756  0.0253573    5.023 5.08e-07 ***
marcaFORMIS                              -1.1430363  0.0251868  -45.382  < 2e-16 ***
marcaFRIZEE                              -1.0029614  0.0215315  -46.581  < 2e-16 ***
marcaFRUTIGRAN                           -1.3750510  0.0253183  -54.311  < 2e-16 ***
marcaFUYƍ                                -1.4412207  0.0204799  -70.372  < 2e-16 ***
marcaGALLO                               -0.8046299  0.0196617  -40.924  < 2e-16 ***
marcaGALLO SNACKS                        -1.3713931  0.0248147  -55.265  < 2e-16 ***
marcaGANCIA                              -0.4767364  0.0217782  -21.891  < 2e-16 ***
marcaGATORADE                            -1.5216712  0.0188088  -80.902  < 2e-16 ***
marcaGENSER                              -0.6092270  0.0217670  -27.989  < 2e-16 ***
marcaGIACOMO                             -0.6433325  0.0204501  -31.459  < 2e-16 ***
marcaGILLETTE                            -0.3964803  0.0193519  -20.488  < 2e-16 ***
marcaGLACIAR                             -1.6427719  0.0215878  -76.097  < 2e-16 ***
marcaGOMES DA COSTA                      -0.7105046  0.0215728  -32.935  < 2e-16 ***
marcaGRANBY                              -1.2620609  0.0250049  -50.473  < 2e-16 ***
marcaGRANIX                              -1.4055022  0.0181559  -77.413  < 2e-16 ***
marcaGRANJA DEL SOL                      -0.6562000  0.0186061  -35.268  < 2e-16 ***
marcaGREEN HILLS                         -1.3507273  0.0202660  -66.650  < 2e-16 ***
marcaH2OH!                               -1.3189164  0.0204742  -64.418  < 2e-16 ***
marcaHARPIC                              -0.7875300  0.0215519  -36.541  < 2e-16 ***
marcaHEINEKEN                            -0.6992505  0.0246989  -28.311  < 2e-16 ***
marcaHELLMANN'S                          -1.4147374  0.0187365  -75.507  < 2e-16 ***
marcaHERBAL ESSENCES                     -0.8212641  0.0219754  -37.372  < 2e-16 ***
marcaHEREFORD                            -1.2025251  0.0214233  -56.132  < 2e-16 ***
marcaHIGIENOL                            -0.9166445  0.0198543  -46.169  < 2e-16 ***
marcaHILERET                             -0.9793820  0.0191942  -51.025  < 2e-16 ***
marcaHINDS                               -0.5988580  0.0258104  -23.202  < 2e-16 ***
marcaHIRAM WALKER                        -0.2030948  0.0253772   -8.003 1.22e-15 ***
marcaHOGAREƑAS                           -1.9132810  0.0256185  -74.684  < 2e-16 ***
marcaHUGGIES                             -0.4164240  0.0197329  -21.103  < 2e-16 ***
marcaIGUANA                              -1.2625556  0.0257238  -49.081  < 2e-16 ***
marcaIMPERIAL                            -0.9303262  0.0257891  -36.074  < 2e-16 ***
marcaISENBECK                            -1.2727263  0.0259437  -49.057  < 2e-16 ***
marcaJ&B                                  1.0652070  0.0251493   42.355  < 2e-16 ***
marcaJOHNSON'S                           -0.4638200  0.0205741  -22.544  < 2e-16 ***
marcaJORGITO                             -1.1420522  0.0218359  -52.302  < 2e-16 ***
marcaKELLOGGS                            -0.5234931  0.0248655  -21.053  < 2e-16 ***
marcaKESITAS                             -1.7563075  0.0217615  -80.707  < 2e-16 ***
marcaKILLKA                               0.0371854  0.0254558    1.461 0.144077    
marcaKIN                                 -1.6452476  0.0216893  -75.855  < 2e-16 ***
marcaKINDER                              -1.4916719  0.0223539  -66.730  < 2e-16 ***
marcaKNORR                               -1.6417419  0.0180892  -90.758  < 2e-16 ***
marcaKNORR QUICK                         -1.1793244  0.0219502  -53.727  < 2e-16 ***
marcaKOLYNOS                             -1.2609304  0.0252611  -49.916  < 2e-16 ***
marcaKOTEX                               -0.3645838  0.0193541  -18.838  < 2e-16 ***
marcaKRACHITOS                           -1.5809479  0.0197036  -80.236  < 2e-16 ***
marcaLA CAMPAGNOLA                       -1.1950844  0.0182385  -65.525  < 2e-16 ***
marcaLA MERCED                           -0.5910471  0.0252803  -23.380  < 2e-16 ***
marcaLA MORENITA                         -0.6828839  0.0219445  -31.119  < 2e-16 ***
marcaLA SALTEƑA                          -1.0657972  0.0193251  -55.151  < 2e-16 ***
marcaLA SERENƍSIMA                       -1.2968376  0.0178081  -72.823  < 2e-16 ***
marcaLA TRANQUERA                        -1.1128884  0.0204633  -54.384  < 2e-16 ***
marcaLA VIRGINIA                         -1.2125046  0.0189847  -63.867  < 2e-16 ***
marcaLACTAL                              -1.5388014  0.0217556  -70.731  < 2e-16 ***
marcaLATITUD 33                          -0.2233877  0.0204447  -10.926  < 2e-16 ***
marcaLAYS                                -0.5644608  0.0249171  -22.654  < 2e-16 ***
marcaLEVITƉ                              -1.1683490  0.0186952  -62.495  < 2e-16 ***
marcaLINCOLN                             -1.9149601  0.0215469  -88.874  < 2e-16 ***
marcaLORD CHESELINE                      -0.9517082  0.0251858  -37.787  < 2e-16 ***
marcaLOS ƁRBOLES                         -0.4062372  0.0216782  -18.739  < 2e-16 ***
marcaLUCCHETTI                           -1.2841314  0.0180134  -71.287  < 2e-16 ***
marcaLYSOFORM                            -0.9607698  0.0187548  -51.228  < 2e-16 ***
marcaMAGGI                               -1.5044012  0.0217615  -69.131  < 2e-16 ***
marcaMAGISTRAL                           -1.0694373  0.0196719  -54.364  < 2e-16 ***
marcaMAIZENA                             -1.4584429  0.0219135  -66.554  < 2e-16 ***
marcaMANƁ                                -1.6366391  0.0203857  -80.284  < 2e-16 ***
marcaMARUCHAN                            -1.4961990  0.0221975  -67.404  < 2e-16 ***
marcaMATARAZZO                           -1.3365896  0.0181644  -73.583  < 2e-16 ***
marcaMAYOLIVA                            -1.6437218  0.0256192  -64.160  < 2e-16 ***
marcaMAZOLA                              -0.6088143  0.0253768  -23.991  < 2e-16 ***
marcaMC CAIN                             -0.5755904  0.0253380  -22.716  < 2e-16 ***
marcaMEDIA TARDE                         -1.6740623  0.0252800  -66.221  < 2e-16 ***
marcaMELBA                               -1.9384968  0.0251675  -77.024  < 2e-16 ***
marcaMELITAS                             -1.8572739  0.0259440  -71.588  < 2e-16 ***
marcaMELLIZAS                            -1.5108915  0.0218362  -69.192  < 2e-16 ***
marcaMENDICRIM                           -1.0289421  0.0218601  -47.069  < 2e-16 ***
marcaMENOYO                              -1.6207751  0.0196227  -82.597  < 2e-16 ***
marcaMERENGADAS                          -1.5769607  0.0215722  -73.101  < 2e-16 ***
marcaMICHEL TORINO                       -1.3937570  0.0216784  -64.292  < 2e-16 ***
marcaMILLER                              -0.7113353  0.0253966  -28.009  < 2e-16 ***
marcaMINERVA                             -0.9413224  0.0255770  -36.803  < 2e-16 ***
 [ reached getOption("max.print") -- omitted 107 rows ]
---
Signif. codes:  0 ā€˜***’ 0.001 ā€˜**’ 0.01 ā€˜*’ 0.05 ā€˜.’ 0.1 ā€˜ ’ 1

Residual standard error: 0.3407 on 343659 degrees of freedom
Multiple R-squared:  0.7687,    Adjusted R-squared:  0.7684 
F-statistic:  3731 on 306 and 343659 DF,  p-value: < 2.2e-16
lm_precioMultiple_log_resid = augment(lm_precioMultiple_log)
lm_precioMultiple_log_resid
mean(lm_precioMultiple_log_resid$.resid)
[1] 1.234472e-12
ggplot(lm_precioMultiple_log_resid, aes(lm_precioMultiple_log_resid$.resid)) + 
  geom_freqpoly(binwidth = 2.5)+
  labs(fill = "propiedades_resid$.resid", title = "Poligono de frecuencia de los residuos", x = "Residuo", y = "count")

ggplot(lm_precioMultiple_log_resid, aes(sample= .std.resid))+
  stat_qq()+
  geom_abline()+
  labs(title = "Normal QQ plot log", x = "Valores teóricos", y = "Residuos estandarizados")

Lo que se obsera en este grafico, es que si bien en los extremos la tendencia es alejarse de la recta, los valores estan mucho mas pegados a ella que en el modelo anterior, lo mismo ocurre con los valores intermedios que estan practicamente sobre la recta. Por lo antes explicado, este modelo esta mejor definido que el anterior.

ggplot(lm_precioMultiple_log_resid, aes(.fitted, .resid)) +
  geom_point()+
  geom_hline(yintercept = 0) +
  geom_smooth(se = FALSE)+
    labs(title = "Residuos versus el modelo ajustado log", x = "valores fitted", y = "Residuos")

Si bien en este caso la diferencia no es tan notoria como en el analisis anterior, se puede apreciar que los residuos no estan formando una figura tan concentrada con en el caso no logaritmico, dando una mejora al modelo en este caso. Repasando el articulo sobre la aplicacion de logaritmos para el estudio, este nuevo modelo con logaritmos podria considerarse un hibrido entre un modelo log-nivel para las covariables que no se modificaron y un modelo log-log para aquellas que si lo fueron.

Coeficientes estimados y sus p-valores asociados


lineal_coef= lm_precioMultiple %>% tidy(conf.int=TRUE)
lineal_coef_log= lm_precioMultiple_log %>% tidy(conf.int=TRUE)

ggplot(lineal_coef, aes(term, estimate))+
  geom_point()+
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high))+
  labs(title = "Coeficientes de la regresion lineal", x="", y="Estimacion e Int. Confianza") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90))


ggplot(lineal_coef_log, aes(term, estimate))+
  geom_point()+
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high))+
  labs(title = "Coeficientes de la regresion lineal", x="", y="Estimacion e Int. Confianza") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90))

P-Valor de los regresores

ggplot(lineal_coef, aes(reorder(term, -p.value), p.value, fill=p.value))+
  geom_bar(stat = 'identity', aes(fill=p.value))+
  geom_hline(yintercept = 0.05) +
  labs(title = "P-valor de los regresores para multiple", x="", y="P-valor") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90)) + 
  scale_fill_gradient2(high='firebrick', low = 'forestgreen', mid='yellow2',midpoint = 0.5 )



ggplot(lineal_coef_log, aes(reorder(term, -p.value), p.value, fill=p.value))+
  geom_bar(stat = 'identity', aes(fill=p.value))+
  geom_hline(yintercept = 0.05) +
  labs(title = "P-valor de los regresores para multiple log", x="", y="P-valor") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90)) + 
  scale_fill_gradient2(high='firebrick', low = 'forestgreen', mid='yellow2',midpoint = 0.5 )

Evaluacion de ambos modelos

multiple = lm_precioMultiple %>% glance() %>% select(r.squared, adj.r.squared, p.value) 
multiple_log = lm_precioMultiple_log %>% glance() %>% select(r.squared, adj.r.squared, p.value)
bind_rows(multiple, multiple_log) %>% mutate(modelo= c('multiple', 'multiple_log'))
NA

Ridge: α=0

Lasso: α=1

Elastic Net: 0<α<1

Lasso

# Vector con los salarios
#prod_precios = precios_train$precio
# Matriz con los regresores
#prod_mtx = model.matrix(precio~ banderaDescripcion + sucursalTipo + medicion + pVentasC + mCuadradoC, data = precios_train)


prod_precios = preciosModelo_log$precio
prod_mtx = model.matrix(precio~ banderaDescripcion + medicion + pVentasC + mCuadradoC + marca, data = preciosModelo_log)


# Modelo Lasso
lasso.mod=glmnet(x=prod_mtx, # Matriz de regresores
                 y=prod_precios, #Vector de la variable a predecir
                 alpha=1, # Indicador del tipo de regularizacion
                 standardize = F) # Que esta haciendo este parametro?
                 
lasso_coef = lasso.mod %>% tidy()

lasso_coef
NA

Grafico de coeficientes en funcion del lambda

Grafico de coeficientes en funcion de la norma de penalizacion

plot(lasso.mod, 'lambda')

plot(lasso.mod)

# Graficos para los valores de lambda en ggplot.

g1=lasso_coef  %>% ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw()  + theme(legend.position = 'none') +
  labs(title="Lasso con Intercepto",  y="Coeficientes")

g2=lasso_coef %>% filter(term!='(Intercept)') %>% 
  ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw()  + theme(legend.position = 'none') +
  labs(title="Lasso sin Intercepto", y="Coeficientes")

plot_grid(g1,g2)

Cross Validation para LASSO

lasso_cv=cv.glmnet(x=prod_mtx,y=prod_precios,alpha=1, standardize = T)
lasso_cv

Call:  cv.glmnet(x = prod_mtx, y = prod_precios, alpha = 1, standardize = T) 

Measure: Mean-Squared Error 

       Lambda Measure        SE Nonzero
min 2.039e-05  0.1166 0.0004265     306
1se 8.232e-05  0.1170 0.0004295     305
plot(lasso_cv)

El grƔfico nos muestra la media del MSE con su limite superior e inferior y la cantidad de varaibles que sobreviven para cada valor de lambda.

# Información de CV en dataframe con tidy
lasso_cv %>% tidy()
NA
# Lambda minimo y lambda a 1 desvio estandar
lasso_cv %>% glance()
NA
# Selección lambda óptimo
lasso_lambda_opt = lasso_cv$lambda.min

# Entrenamiento modelo óptimo
lasso_opt = glmnet(x=prod_mtx, # Matriz de regresores
                 y=prod_precios, #Vector de la variable a predecir
                 alpha=1, # Indicador del tipo de regularizacion
                 standardize = TRUE,  # Estandarizamos
                 lambda = lasso_lambda_opt)

# Salida estandar
#lasso_opt
# Tidy
lasso_opt %>% tidy()
NA

Las variables explican el 76 % del deviance.

RIDGE

α=0

#Modelo ridge
ridge.mod=glmnet(x=prod_mtx, # Matriz de regresores
                 y=prod_precios, #Vector de la variable a predecir
                 alpha=0, # Indicador del tipo de regularizacion
                 standardize = TRUE)
#Coeficientes tidy                 
ridge_coef= ridge.mod %>% tidy()

ridge_coef 
NA

plot(ridge.mod, 'lambda')

plot(ridge.mod)

NA
NA

g1=ridge_coef  %>% ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw()  + theme(legend.position = 'none') +
  labs(title="Ridge con Intercepto",  y="Coeficientes")

g2=ridge_coef %>% filter(term!='(Intercept)') %>% 
  ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw()  + theme(legend.position = 'none') +
  labs(title="Ridge sin Intercepto", y="Coeficientes")

plot_grid(g1,g2)

Elección lambda óptimo

ridge_cv=cv.glmnet(x=prod_mtx,y=prod_precios,alpha=0, standardize = T)
plot(ridge_cv)

Seleccion lambda óptimo para crear el modelo final

# Selección lambda óptimo
ridge_lambda_opt = ridge_cv$lambda.min

# Entrenamiento modelo óptimo
ridge_opt = glmnet(x=prod_mtx, # Matriz de regresores
                 y=prod_precios, #Vector de la variable a predecir
                 alpha=0, # Indicador del tipo de regularizacion
                 standardize = TRUE,  # Estandarizamos
                 lambda = ridge_lambda_opt)

# Salida estandar
#ridge_opt
ridge_opt %>% tidy()
NA

Compracion entre Lasso y Ridge


ridge_dev = ridge_coef %>% select(lambda, dev.ratio) %>% distinct() %>%
  ggplot(., aes(log(lambda), dev.ratio)) +
  geom_point() +
  geom_line() +
  geom_vline(xintercept = log(ridge_lambda_opt), color='steelblue', size=1.5) +
  labs(title='Ridge: Deviance') +
  theme_bw() 

lasso_dev = lasso_coef %>% select(lambda, dev.ratio) %>% distinct() %>%
  ggplot(., aes(log(lambda), dev.ratio)) +
  geom_point() +
  geom_line() +
  geom_vline(xintercept = log(lasso_lambda_opt), color='firebrick', size=1.5) +
  labs(title='Lasso: Deviance') +
  theme_bw()

plot_grid(ridge_dev, lasso_dev)

NA
NA

Compracion de la relación entre el porcentaje de deviance explicada y lambda para los tres tipos de modelos que realizamos

Compracion Modelos

ridge_opt , lasso_opt, lm_precioMultiple_log

precios_train - precios_test

eval_results <- function(true, predicted, df) {
  SSE <- sum((predicted - true)^2)
  SST <- sum((true - mean(true))^2)
  R_square <- 1 - SSE / SST
  RMSE = sqrt(SSE/nrow(df))

  
  # Model performance metrics
data.frame(
  RMSE = RMSE,
  Rsquare = R_square
)
  
}
# Prediccion y evaluacion en train data Lasso
predictions_train <- predict(lasso_opt, s = lasso_lambda_opt, newx = prod_mtx)
eval_results(preciosModelo$precio, predictions_train, preciosModelo)
NA
NA
# Prediction and evaluation on train data Ridge
predictions_train <- predict(ridge_opt, s = ridge_lambda_opt, newx = prod_mtx)
eval_results(preciosModelo$precio, predictions_train, preciosModelo)
NA
LS0tCnRpdGxlOiAiVHJhYmFqbyBFc3BlY2lhbGl6YWNpb24gVjEuMC4wIgphdXRob3I6ICJJZ25hY2lvIENoaWFwZWxsYSIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgaHRtbF9ub3RlYm9vazoKICAgIGRmX3ByaW50OiBwYWdlZAogICAgdGhlbWU6IHNwYWNlbGFiCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwotLS0KCjxzdHlsZSB0eXBlPSJ0ZXh0L2NzcyI+CmRpdi5tYWluLWNvbnRhaW5lciB7CiAgbWF4LXdpZHRoOiAxNjAwcHg7CiAgbWFyZ2luLWxlZnQ6IGF1dG87CiAgbWFyZ2luLXJpZ2h0OiBhdXRvOwp9Cjwvc3R5bGU+CgojIyBMaWJyZXJpYXMKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeShjbHVzdGVyKQpsaWJyYXJ5KGJyb29tKQpsaWJyYXJ5KGdsbW5ldCkKbGlicmFyeShtb2RlbHIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShjb3dwbG90KQoKbGlicmFyeShtb25nb2xpdGUpCmxpYnJhcnkoZ2dtYXApCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoc3ApCmxpYnJhcnkobHVicmlkYXRlKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHJlc2hhcGUyKQpsaWJyYXJ5KHN0cmluZ3IpCmBgYAoKCmBgYHtyfQojIGJhcnJpb3NDYXRlZ29yaWNvcyA9IGlubmVyX2pvaW4obWN1YWRyYWRvLGFuYWxpc2lzX0JhcnJpb3MgLCBieT0iYmFycmlvIiklPiVzZWxlY3QoYmFycmlvLCBtQ3VhZHJhZG9DLCBwVmVudGFzQykKIyBwcmVjaW9zTW9kZWxvID0gaW5uZXJfam9pbihwcmVjaW9zLGJhcnJpb3NDYXRlZ29yaWNvcyAsIGJ5PSJiYXJyaW8iKQojIHByZWNpb3NNb2RlbG8gPSBpbm5lcl9qb2luKHByZWNpb3NNb2RlbG8scHJvZHVjdG9zICwgYnk9YygicHJvZHVjdG8iPSJpZCIpKQojIHByZWNpb3NNb2RlbG8gPSBzZWxlY3QgKHByZWNpb3NNb2RlbG8sLWMoZmVjaGEsbm9tYnJlLHByZXNlbnRhY2lvbixwcm9kdWN0bykpCiMgcHJlY2lvc01vZGVsbwojIAojIAojIHdyaXRlLmNzdihwcmVjaW9zTW9kZWxvLCIvaG9tZS9pZ25hY2lvL2RhdG9zL2ZhY3VsdGFkL3JlcG9zL3RwRXNwZWNpYWxpemFjaW9uL2RhdGEvcHJlY2lvc01vZGVsby5jc3YiLCByb3cubmFtZXMgPSBUUlVFKQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CnByZWNpb3NNb2RlbG8gPC0gcmVhZC5jc3YoZmlsZSA9ICcvaG9tZS9pZ25hY2lvL2RhdG9zL2ZhY3VsdGFkL3JlcG9zL3RwRXNwZWNpYWxpemFjaW9uL2RhdGEvcHJlY2lvc01vZGVsby5jc3YnKQpwcmVjaW9zTW9kZWxvID0gc2VsZWN0IChwcmVjaW9zTW9kZWxvLC1jKFgsYmFycmlvLHN1Y3Vyc2FsLHN1Y3Vyc2FsVGlwbykpCnByZWNpb3NNb2RlbG8gPC0gcHJlY2lvc01vZGVsbyAlPiUgcmVzYW1wbGVfcGFydGl0aW9uKGModHJhaW49MC4zLHRlc3Q9MC43KSkKcHJlY2lvc01vZGVsbyA9IHByZWNpb3NNb2RlbG8kdHJhaW4gJT4lIGFzX3RpYmJsZSgpCmBgYAoKCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CnRyYWluX3Rlc3QgPC0gcHJlY2lvc01vZGVsbyAlPiUgcmVzYW1wbGVfcGFydGl0aW9uKGModHJhaW49MC4zLHRlc3Q9MC43KSkKcHJlY2lvc190cmFpbiA8LSB0cmFpbl90ZXN0JHRyYWluICU+JSBhc190aWJibGUoKQpwcmVjaW9zX3Rlc3QgPC0gdHJhaW5fdGVzdCR0ZXN0ICU+JSBhc190aWJibGUoKQpgYGAKCiMjIyBNb2RlbG8gbGluZWFsIHNpbXBsZSBwYXJhIGV4cGxpY2FyIGVsIHByZWNpbyBlbiBmdW5jacOzbiBkZSBsYSBiYW5kZXJhIHN1cGVybWVyY2FkbwpgYGB7cn0KIyBiYW5kZXJhRGVzY3JpcGNpb24gKyBtZWRpY2lvbiArIGJhcnJpbyArIGJhbmRlcmFEZXNjcmlwY2lvbiArIHBWZW50YXNDICsgbUN1YWRyYWRvQwpsbV9wcmVjaW8yYmFuZGVyYSA9IGxtKGZvcm11bGEgPSBwcmVjaW9+YmFuZGVyYURlc2NyaXBjaW9uLCBkYXRhPXByZWNpb3NNb2RlbG8pCmxtX3ByZWNpbzJtZWRpY2lvbiA9IGxtKGZvcm11bGEgPSBwcmVjaW9+bWVkaWNpb24sIGRhdGE9cHJlY2lvc01vZGVsbykKI2xtX3ByZWNpbzJiYXJyaW8gPSBsbShmb3JtdWxhID0gcHJlY2lvfmJhcnJpbywgZGF0YT1wcmVjaW9zKQpgYGAKCiMjIyAgQW5hbGlzaXMgZGUgcHJlY2lvIHBvciBCYXJyaW8KYGBge3J9CnN1bW1hcnkobG1fcHJlY2lvMmJhbmRlcmEpCiNjb2VmKGxtX3ByZWNpbzJiYW5kZXJhKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGxtX3ByZWNpbzJtZWRpY2lvbikKCmBgYAoKCgoKYGBge3J9CmdsYW5jZShsbV9wcmVjaW8yYmFuZGVyYSkKZ2xhbmNlKGxtX3ByZWNpbzJtZWRpY2lvbikKYGBgCgoKYGBge3J9CiMgYmFuZGVyYURlc2NyaXBjaW9uICsgc3VjdXJzYWxUaXBvICsgbWVkaWNpb24gKyBwVmVudGFzQyArIG1DdWFkcmFkb0MKbG1fcHJlY2lvTXVsdGlwbGUgPSBsbShwcmVjaW8gfiBiYW5kZXJhRGVzY3JpcGNpb24gKyBtZWRpY2lvbiArIHBWZW50YXNDICsgbUN1YWRyYWRvQyArIG1hcmNhLCBkYXRhPXByZWNpb3NNb2RlbG8pCmBgYAoKYGBge3J9CiMgbWVkaWNpb24gLSBiYXJyaW8gLSBzdWN1cnNhbFRpcG8gLSBiYW5kZXJhRGVzY3JpcGNpb24Kc3VtbWFyeShsbV9wcmVjaW9NdWx0aXBsZSkKYGBgCgojIyMgQW5hbGlzaXMgbW9kZWxvIGNvbXB1ZXN0bwojIyMjIEFuYWxpc2lzIFJlc2lkdW8KCmBgYHtyfQpwcmVjaW9NdWx0aXBsZV9yZXNpZCA9IGF1Z21lbnQobG1fcHJlY2lvTXVsdGlwbGUpCnByZWNpb011bHRpcGxlX3Jlc2lkCmBgYAoKYGBge3J9CiNFbCBwcm9tZWRpbyBkZSBsb3MgcmVzaWR1b3MgZGViZSBzZXIgdW4gbnVtZXJvIG11eSBjZXJjYW5vIGEgY2VybwptZWFuKHByZWNpb011bHRpcGxlX3Jlc2lkJC5yZXNpZCkKYGBgCgpDb21vIHNlIHB1ZWRlIGFwcmVjaWFyIGVsIHZhbG9yIG9idGVuaWRvIGRlbCBwcm9tZWRpbyBkZSB0b2RvcyBsb3MgcmVzaWR1b3MsIGVzIHVuIG51bWVybyBjZXJjYW5vIGEgY2Vyby4KCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CmdncGxvdChwcmVjaW9NdWx0aXBsZV9yZXNpZCwgYWVzKHByZWNpb011bHRpcGxlX3Jlc2lkJC5yZXNpZCkpICsgCiAgZ2VvbV9mcmVxcG9seShiaW53aWR0aCA9IDEuNSkrCiAgbGFicyhmaWxsID0gInByZWNpb011bHRpcGxlX3Jlc2lkJC5yZXNpZCIsIHRpdGxlID0gIlBvbGlnb25vIGRlIGZyZWN1ZW5jaWEgZGUgbG9zIHJlc2lkdW9zIiwgeCA9ICJSZXNpZHVvIiwgeSA9ICJjb3VudCIpCmBgYAoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KCmdncGxvdChwcmVjaW9NdWx0aXBsZV9yZXNpZCwgYWVzKHNhbXBsZT0gLnN0ZC5yZXNpZCkpKwogIHN0YXRfcXEoKSsKICBnZW9tX2FibGluZSgpKwogIGxhYnModGl0bGUgPSAiTm9ybWFsIFFRIHBsb3QiLCB4ID0gIlZhbG9yZXMgdGXDs3JpY29zIiwgeSA9ICJSZXNpZHVvcyBlc3RhbmRhcml6YWRvcyIpCgpgYGAKClNlIHF1aWVyZSB2YWxpZGFyLCBzaSBsb3MgcmVzaWR1b3Mgc2lndWllbiB1bmEgZGlzdHJpYnVjaW9uIHRlb3JpY2EsIE4oMCwxKS4KQ29tbyBwb2RlbW9zIHZlciBlbCBtb2RlbG8gZW4gbG9zIGV4dHJlbW9zIHRpZW5kZSBhIGFsZWphcnNlIGRlIGxhIGRpc3RyaWJ1Y2lvbiBOb3JtYWwsIHBvciBsbyBxdWUgcHVlZG8gY29uY2x1aXIgcXVlIGVsIG1vZGVsbyBubyBlc3RhIGJpZW4gZGVmaW5pZG8uCgoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KCmdncGxvdChwcmVjaW9NdWx0aXBsZV9yZXNpZCwgYWVzKC5maXR0ZWQsIC5yZXNpZCkpICsKICBnZW9tX3BvaW50KCkrCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCkgKwogIGdlb21fc21vb3RoKHNlID0gRkFMU0UpKwogICAgbGFicyh0aXRsZSA9ICJSZXNpZHVvcyB2ZXJzdXMgZWwgbW9kZWxvIGFqdXN0YWRvIiwgeCA9ICJ2YWxvcmVzIGZpdHRlZCIsIHkgPSAiUmVzaWR1b3MiKQoKYGBgCgoKCk90cm8gY2FzbyBpbnRlcmVzYW5kbyBwYXJhIGVzdHVkaWFyLCBlcyBzaSBsb3MgcmVzaWR1b3MgdGllbmVuIG8gbm8gdW5hIGVzdHJ1Y3R1cmEgZGVmaW5pZGEuCkxvIHF1ZSBzZSBvYnNldmEgZXMgdW5hIGNsYXJhIGVzdHJ1Y3R1cmEgZW4gZWwgbWVkaW8gZGVsIGdyYWZpY28sIGVzdG8gZXN0YSBpbmRpY2FuZG8gcXVlIHVuYSBwYXJ0ZSBzaXN0ZW3DoXRpY2EgZGVsIGZlbsOzbWVubyBxdWUgc2UgZXN0YSBwZXJkaWVuZG8sIGxvIGN1YWwgaW5kaWNhIHF1ZSBlbCBtb2RlbG8gbm8gZXN0YSBmdW5jaW9uYW5kbyBjb21vIHNlIGVzcGVyYXJpYS4KCgojIyMgTW9kZWxvIExvZ2FyaXRtaWNvCiMjIyBsb2cocHJpY2UpPc6yMCvOsjFsb2cocm9vbXMpK86yMmxvZyhiYXRocm9vbXMpK86yM2xvZyhzdXJmYWNlX2NvdmVyZWQpK86yNHByb3BlcnR5X3R5cGUrzrI1YmFycmlvK86yNnN1cmZhY2VfcGF0aW8KCmBgYHtyfQojIGJhbmRlcmFEZXNjcmlwY2lvbiArIHN1Y3Vyc2FsVGlwbyArIG1lZGljaW9uICsgYmFuZGVyYURlc2NyaXBjaW9uICsgcFZlbnRhc0MgKyBtQ3VhZHJhZG9DCnByZWNpb3NNb2RlbG9fbG9nICAgICAgICAgICAgICAgICAgICA9IHByZWNpb3NNb2RlbG8KcHJlY2lvc01vZGVsb19sb2ckcHJlY2lvICAgICAgICAgICAgID0gbG9nKHByZWNpb3NNb2RlbG9fbG9nJHByZWNpbykKcHJlY2lvc01vZGVsb19sb2ckbWVkaWNpb24gICAgICAgICAgID0gbG9nKHByZWNpb3NNb2RlbG9fbG9nJG1lZGljaW9uKQoKbG1fcHJlY2lvTXVsdGlwbGVfbG9nID0gbG0ocHJlY2lvIH4gYmFuZGVyYURlc2NyaXBjaW9uICsgbWVkaWNpb24gKyBwVmVudGFzQyArIG1DdWFkcmFkb0MgKyBtYXJjYSwgZGF0YT1wcmVjaW9zTW9kZWxvX2xvZykKCgpgYGAKCmBgYHtyfQpzdW1tYXJ5KGxtX3ByZWNpb011bHRpcGxlX2xvZykKYGBgCgpgYGB7cn0KbG1fcHJlY2lvTXVsdGlwbGVfbG9nX3Jlc2lkID0gYXVnbWVudChsbV9wcmVjaW9NdWx0aXBsZV9sb2cpCmxtX3ByZWNpb011bHRpcGxlX2xvZ19yZXNpZApgYGAKCmBgYHtyfQptZWFuKGxtX3ByZWNpb011bHRpcGxlX2xvZ19yZXNpZCQucmVzaWQpCmBgYAoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KZ2dwbG90KGxtX3ByZWNpb011bHRpcGxlX2xvZ19yZXNpZCwgYWVzKGxtX3ByZWNpb011bHRpcGxlX2xvZ19yZXNpZCQucmVzaWQpKSArIAogIGdlb21fZnJlcXBvbHkoYmlud2lkdGggPSAyLjUpKwogIGxhYnMoZmlsbCA9ICJwcm9waWVkYWRlc19yZXNpZCQucmVzaWQiLCB0aXRsZSA9ICJQb2xpZ29ubyBkZSBmcmVjdWVuY2lhIGRlIGxvcyByZXNpZHVvcyIsIHggPSAiUmVzaWR1byIsIHkgPSAiY291bnQiKQpgYGAKCgoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KZ2dwbG90KGxtX3ByZWNpb011bHRpcGxlX2xvZ19yZXNpZCwgYWVzKHNhbXBsZT0gLnN0ZC5yZXNpZCkpKwogIHN0YXRfcXEoKSsKICBnZW9tX2FibGluZSgpKwogIGxhYnModGl0bGUgPSAiTm9ybWFsIFFRIHBsb3QgbG9nIiwgeCA9ICJWYWxvcmVzIHRlw7NyaWNvcyIsIHkgPSAiUmVzaWR1b3MgZXN0YW5kYXJpemFkb3MiKQoKYGBgCgpMbyBxdWUgc2Ugb2JzZXJhIGVuIGVzdGUgZ3JhZmljbywgZXMgcXVlIHNpIGJpZW4gZW4gbG9zIGV4dHJlbW9zIGxhIHRlbmRlbmNpYSBlcyBhbGVqYXJzZSBkZSBsYSByZWN0YSwgbG9zIHZhbG9yZXMgZXN0YW4gbXVjaG8gbWFzIHBlZ2Fkb3MgYSBlbGxhIHF1ZSBlbiBlbCBtb2RlbG8gYW50ZXJpb3IsIGxvIG1pc21vIG9jdXJyZSBjb24gbG9zIHZhbG9yZXMgaW50ZXJtZWRpb3MgcXVlIGVzdGFuIHByYWN0aWNhbWVudGUgc29icmUgbGEgcmVjdGEuIFBvciBsbyBhbnRlcyBleHBsaWNhZG8sIGVzdGUgbW9kZWxvIGVzdGEgbWVqb3IgZGVmaW5pZG8gcXVlIGVsIGFudGVyaW9yLgoKCgpgYGB7ciwgZmlnLndpZHRoPTgsZmlnLmhlaWdodD04fQpnZ3Bsb3QobG1fcHJlY2lvTXVsdGlwbGVfbG9nX3Jlc2lkLCBhZXMoLmZpdHRlZCwgLnJlc2lkKSkgKwogIGdlb21fcG9pbnQoKSsKICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwKSArCiAgZ2VvbV9zbW9vdGgoc2UgPSBGQUxTRSkrCiAgICBsYWJzKHRpdGxlID0gIlJlc2lkdW9zIHZlcnN1cyBlbCBtb2RlbG8gYWp1c3RhZG8gbG9nIiwgeCA9ICJ2YWxvcmVzIGZpdHRlZCIsIHkgPSAiUmVzaWR1b3MiKQoKYGBgCgpTaSBiaWVuIGVuIGVzdGUgY2FzbyBsYSBkaWZlcmVuY2lhIG5vIGVzIHRhbiBub3RvcmlhIGNvbW8gZW4gZWwgYW5hbGlzaXMgYW50ZXJpb3IsIHNlIHB1ZWRlIGFwcmVjaWFyIHF1ZSBsb3MgcmVzaWR1b3Mgbm8gZXN0YW4gZm9ybWFuZG8gdW5hIGZpZ3VyYSB0YW4gY29uY2VudHJhZGEgY29uIGVuIGVsIGNhc28gbm8gbG9nYXJpdG1pY28sIGRhbmRvIHVuYSBtZWpvcmEgYWwgbW9kZWxvIGVuIGVzdGUgY2Fzby4gUmVwYXNhbmRvIGVsIGFydGljdWxvIHNvYnJlIGxhIGFwbGljYWNpb24gZGUgbG9nYXJpdG1vcyBwYXJhIGVsIGVzdHVkaW8sIGVzdGUgbnVldm8gbW9kZWxvIGNvbiBsb2dhcml0bW9zIHBvZHJpYSBjb25zaWRlcmFyc2UgdW4gaGlicmlkbyBlbnRyZSB1biBtb2RlbG8gbG9nLW5pdmVsIHBhcmEgbGFzIGNvdmFyaWFibGVzIHF1ZSBubyBzZSBtb2RpZmljYXJvbiB5IHVuIG1vZGVsbyBsb2ctbG9nIHBhcmEgYXF1ZWxsYXMgcXVlIHNpIGxvIGZ1ZXJvbi4KCgojIyMgQ29lZmljaWVudGVzIGVzdGltYWRvcyB5IHN1cyBwLXZhbG9yZXMgYXNvY2lhZG9zCgpgYGB7ciwgZmlnLndpZHRoPTgsZmlnLmhlaWdodD04fQoKbGluZWFsX2NvZWY9IGxtX3ByZWNpb011bHRpcGxlICU+JSB0aWR5KGNvbmYuaW50PVRSVUUpCmxpbmVhbF9jb2VmX2xvZz0gbG1fcHJlY2lvTXVsdGlwbGVfbG9nICU+JSB0aWR5KGNvbmYuaW50PVRSVUUpCgpnZ3Bsb3QobGluZWFsX2NvZWYsIGFlcyh0ZXJtLCBlc3RpbWF0ZSkpKwogIGdlb21fcG9pbnQoKSsKICBnZW9tX3BvaW50cmFuZ2UoYWVzKHltaW4gPSBjb25mLmxvdywgeW1heCA9IGNvbmYuaGlnaCkpKwogIGxhYnModGl0bGUgPSAiQ29lZmljaWVudGVzIGRlIGxhIHJlZ3Jlc2lvbiBsaW5lYWwiLCB4PSIiLCB5PSJFc3RpbWFjaW9uIGUgSW50LiBDb25maWFuemEiKSArCiAgdGhlbWVfYncoKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGU9OTApKQoKZ2dwbG90KGxpbmVhbF9jb2VmX2xvZywgYWVzKHRlcm0sIGVzdGltYXRlKSkrCiAgZ2VvbV9wb2ludCgpKwogIGdlb21fcG9pbnRyYW5nZShhZXMoeW1pbiA9IGNvbmYubG93LCB5bWF4ID0gY29uZi5oaWdoKSkrCiAgbGFicyh0aXRsZSA9ICJDb2VmaWNpZW50ZXMgZGUgbGEgcmVncmVzaW9uIGxpbmVhbCIsIHg9IiIsIHk9IkVzdGltYWNpb24gZSBJbnQuIENvbmZpYW56YSIpICsKICB0aGVtZV9idygpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZT05MCkpCgpgYGAKCgojIyMgUC1WYWxvciBkZSBsb3MgcmVncmVzb3JlcwoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KZ2dwbG90KGxpbmVhbF9jb2VmLCBhZXMocmVvcmRlcih0ZXJtLCAtcC52YWx1ZSksIHAudmFsdWUsIGZpbGw9cC52YWx1ZSkpKwogIGdlb21fYmFyKHN0YXQgPSAnaWRlbnRpdHknLCBhZXMoZmlsbD1wLnZhbHVlKSkrCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMC4wNSkgKwogIGxhYnModGl0bGUgPSAiUC12YWxvciBkZSBsb3MgcmVncmVzb3JlcyBwYXJhIG11bHRpcGxlIiwgeD0iIiwgeT0iUC12YWxvciIpICsKICB0aGVtZV9idygpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZT05MCkpICsgCiAgc2NhbGVfZmlsbF9ncmFkaWVudDIoaGlnaD0nZmlyZWJyaWNrJywgbG93ID0gJ2ZvcmVzdGdyZWVuJywgbWlkPSd5ZWxsb3cyJyxtaWRwb2ludCA9IDAuNSApCgoKZ2dwbG90KGxpbmVhbF9jb2VmX2xvZywgYWVzKHJlb3JkZXIodGVybSwgLXAudmFsdWUpLCBwLnZhbHVlLCBmaWxsPXAudmFsdWUpKSsKICBnZW9tX2JhcihzdGF0ID0gJ2lkZW50aXR5JywgYWVzKGZpbGw9cC52YWx1ZSkpKwogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDAuMDUpICsKICBsYWJzKHRpdGxlID0gIlAtdmFsb3IgZGUgbG9zIHJlZ3Jlc29yZXMgcGFyYSBtdWx0aXBsZSBsb2ciLCB4PSIiLCB5PSJQLXZhbG9yIikgKwogIHRoZW1lX2J3KCkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlPTkwKSkgKyAKICBzY2FsZV9maWxsX2dyYWRpZW50MihoaWdoPSdmaXJlYnJpY2snLCBsb3cgPSAnZm9yZXN0Z3JlZW4nLCBtaWQ9J3llbGxvdzInLG1pZHBvaW50ID0gMC41ICkKCmBgYAoKIyMjIEV2YWx1YWNpb24gZGUgYW1ib3MgbW9kZWxvcwoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KbXVsdGlwbGUgPSBsbV9wcmVjaW9NdWx0aXBsZSAlPiUgZ2xhbmNlKCkgJT4lIHNlbGVjdChyLnNxdWFyZWQsIGFkai5yLnNxdWFyZWQsIHAudmFsdWUpIAptdWx0aXBsZV9sb2cgPSBsbV9wcmVjaW9NdWx0aXBsZV9sb2cgJT4lIGdsYW5jZSgpICU+JSBzZWxlY3Qoci5zcXVhcmVkLCBhZGouci5zcXVhcmVkLCBwLnZhbHVlKQpiaW5kX3Jvd3MobXVsdGlwbGUsIG11bHRpcGxlX2xvZykgJT4lIG11dGF0ZShtb2RlbG89IGMoJ211bHRpcGxlJywgJ211bHRpcGxlX2xvZycpKQoKYGBgCgojIyMgUmlkZ2U6IM6xPTAKIyMjIExhc3NvOiDOsT0xCiMjIyBFbGFzdGljIE5ldDogMDzOsTwxCgojIyBMYXNzbwoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KIyBWZWN0b3IgY29uIGxvcyBzYWxhcmlvcwojcHJvZF9wcmVjaW9zID0gcHJlY2lvc190cmFpbiRwcmVjaW8KIyBNYXRyaXogY29uIGxvcyByZWdyZXNvcmVzCiNwcm9kX210eCA9IG1vZGVsLm1hdHJpeChwcmVjaW9+IGJhbmRlcmFEZXNjcmlwY2lvbiArIHN1Y3Vyc2FsVGlwbyArIG1lZGljaW9uICsgcFZlbnRhc0MgKyBtQ3VhZHJhZG9DLCBkYXRhID0gcHJlY2lvc190cmFpbikKCgpwcm9kX3ByZWNpb3MgPSBwcmVjaW9zTW9kZWxvX2xvZyRwcmVjaW8KcHJvZF9tdHggPSBtb2RlbC5tYXRyaXgocHJlY2lvfiBiYW5kZXJhRGVzY3JpcGNpb24gKyBtZWRpY2lvbiArIHBWZW50YXNDICsgbUN1YWRyYWRvQyArIG1hcmNhLCBkYXRhID0gcHJlY2lvc01vZGVsb19sb2cpCgoKIyBNb2RlbG8gTGFzc28KbGFzc28ubW9kPWdsbW5ldCh4PXByb2RfbXR4LCAjIE1hdHJpeiBkZSByZWdyZXNvcmVzCiAgICAgICAgICAgICAgICAgeT1wcm9kX3ByZWNpb3MsICNWZWN0b3IgZGUgbGEgdmFyaWFibGUgYSBwcmVkZWNpcgogICAgICAgICAgICAgICAgIGFscGhhPTEsICMgSW5kaWNhZG9yIGRlbCB0aXBvIGRlIHJlZ3VsYXJpemFjaW9uCiAgICAgICAgICAgICAgICAgc3RhbmRhcmRpemUgPSBGKSAjIFF1ZSBlc3RhIGhhY2llbmRvIGVzdGUgcGFyYW1ldHJvPwogICAgICAgICAgICAgICAgIApsYXNzb19jb2VmID0gbGFzc28ubW9kICU+JSB0aWR5KCkKCmxhc3NvX2NvZWYKCmBgYAoKIyMjIEdyYWZpY28gZGUgY29lZmljaWVudGVzIGVuIGZ1bmNpb24gZGVsIGxhbWJkYQojIyMgR3JhZmljbyBkZSBjb2VmaWNpZW50ZXMgZW4gZnVuY2lvbiBkZSBsYSBub3JtYSBkZSBwZW5hbGl6YWNpb24KYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KcGxvdChsYXNzby5tb2QsICdsYW1iZGEnKQpwbG90KGxhc3NvLm1vZCkKCmBgYAoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KIyBHcmFmaWNvcyBwYXJhIGxvcyB2YWxvcmVzIGRlIGxhbWJkYSBlbiBnZ3Bsb3QuCgpnMT1sYXNzb19jb2VmICAlPiUgZ2dwbG90KC4sIGFlcyhsb2cobGFtYmRhKSwgZXN0aW1hdGUsIGdyb3VwPXRlcm0sIGNvbG9yPXRlcm0pKSArIGdlb21fbGluZSgpICsgdGhlbWVfYncoKSAgKyB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAnbm9uZScpICsKICBsYWJzKHRpdGxlPSJMYXNzbyBjb24gSW50ZXJjZXB0byIsICB5PSJDb2VmaWNpZW50ZXMiKQoKZzI9bGFzc29fY29lZiAlPiUgZmlsdGVyKHRlcm0hPScoSW50ZXJjZXB0KScpICU+JSAKICBnZ3Bsb3QoLiwgYWVzKGxvZyhsYW1iZGEpLCBlc3RpbWF0ZSwgZ3JvdXA9dGVybSwgY29sb3I9dGVybSkpICsgZ2VvbV9saW5lKCkgKyB0aGVtZV9idygpICArIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICdub25lJykgKwogIGxhYnModGl0bGU9Ikxhc3NvIHNpbiBJbnRlcmNlcHRvIiwgeT0iQ29lZmljaWVudGVzIikKCnBsb3RfZ3JpZChnMSxnMikKCmBgYAoKIyMjIENyb3NzIFZhbGlkYXRpb24gcGFyYSBMQVNTTwoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KbGFzc29fY3Y9Y3YuZ2xtbmV0KHg9cHJvZF9tdHgseT1wcm9kX3ByZWNpb3MsYWxwaGE9MSwgc3RhbmRhcmRpemUgPSBUKQpsYXNzb19jdgpgYGAKCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CnBsb3QobGFzc29fY3YpCgpgYGAKCkVsIGdyw6FmaWNvIG5vcyBtdWVzdHJhIGxhIG1lZGlhIGRlbCBNU0UgY29uIHN1IGxpbWl0ZSBzdXBlcmlvciBlIGluZmVyaW9yIHkgbGEgY2FudGlkYWQgZGUgdmFyYWlibGVzIHF1ZSBzb2JyZXZpdmVuIHBhcmEgY2FkYSB2YWxvciBkZSBsYW1iZGEuCgoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KIyBJbmZvcm1hY2nDs24gZGUgQ1YgZW4gZGF0YWZyYW1lIGNvbiB0aWR5Cmxhc3NvX2N2ICU+JSB0aWR5KCkKCmBgYAoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KIyBMYW1iZGEgbWluaW1vIHkgbGFtYmRhIGEgMSBkZXN2aW8gZXN0YW5kYXIKbGFzc29fY3YgJT4lIGdsYW5jZSgpCgpgYGAKCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CiMgU2VsZWNjacOzbiBsYW1iZGEgw7NwdGltbwpsYXNzb19sYW1iZGFfb3B0ID0gbGFzc29fY3YkbGFtYmRhLm1pbgoKIyBFbnRyZW5hbWllbnRvIG1vZGVsbyDDs3B0aW1vCmxhc3NvX29wdCA9IGdsbW5ldCh4PXByb2RfbXR4LCAjIE1hdHJpeiBkZSByZWdyZXNvcmVzCiAgICAgICAgICAgICAgICAgeT1wcm9kX3ByZWNpb3MsICNWZWN0b3IgZGUgbGEgdmFyaWFibGUgYSBwcmVkZWNpcgogICAgICAgICAgICAgICAgIGFscGhhPTEsICMgSW5kaWNhZG9yIGRlbCB0aXBvIGRlIHJlZ3VsYXJpemFjaW9uCiAgICAgICAgICAgICAgICAgc3RhbmRhcmRpemUgPSBUUlVFLCAgIyBFc3RhbmRhcml6YW1vcwogICAgICAgICAgICAgICAgIGxhbWJkYSA9IGxhc3NvX2xhbWJkYV9vcHQpCgojIFNhbGlkYSBlc3RhbmRhcgojbGFzc29fb3B0CiMgVGlkeQpsYXNzb19vcHQgJT4lIHRpZHkoKQoKYGBgCgoKTGFzIHZhcmlhYmxlcyBleHBsaWNhbiBlbCA3NiAlIGRlbCBkZXZpYW5jZS4KCiMjIFJJREdFCiMjIyDOsT0wCgpgYGB7ciwgZmlnLndpZHRoPTgsZmlnLmhlaWdodD04fQojTW9kZWxvIHJpZGdlCnJpZGdlLm1vZD1nbG1uZXQoeD1wcm9kX210eCwgIyBNYXRyaXogZGUgcmVncmVzb3JlcwogICAgICAgICAgICAgICAgIHk9cHJvZF9wcmVjaW9zLCAjVmVjdG9yIGRlIGxhIHZhcmlhYmxlIGEgcHJlZGVjaXIKICAgICAgICAgICAgICAgICBhbHBoYT0wLCAjIEluZGljYWRvciBkZWwgdGlwbyBkZSByZWd1bGFyaXphY2lvbgogICAgICAgICAgICAgICAgIHN0YW5kYXJkaXplID0gVFJVRSkKI0NvZWZpY2llbnRlcyB0aWR5ICAgICAgICAgICAgICAgICAKcmlkZ2VfY29lZj0gcmlkZ2UubW9kICU+JSB0aWR5KCkKCnJpZGdlX2NvZWYgCgpgYGAKCgpgYGB7ciwgZmlnLndpZHRoPTgsZmlnLmhlaWdodD04fQoKcGxvdChyaWRnZS5tb2QsICdsYW1iZGEnKQpwbG90KHJpZGdlLm1vZCkKCgpgYGAKCgpgYGB7ciwgZmlnLndpZHRoPTgsZmlnLmhlaWdodD04fQoKZzE9cmlkZ2VfY29lZiAgJT4lIGdncGxvdCguLCBhZXMobG9nKGxhbWJkYSksIGVzdGltYXRlLCBncm91cD10ZXJtLCBjb2xvcj10ZXJtKSkgKyBnZW9tX2xpbmUoKSArIHRoZW1lX2J3KCkgICsgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gJ25vbmUnKSArCiAgbGFicyh0aXRsZT0iUmlkZ2UgY29uIEludGVyY2VwdG8iLCAgeT0iQ29lZmljaWVudGVzIikKCmcyPXJpZGdlX2NvZWYgJT4lIGZpbHRlcih0ZXJtIT0nKEludGVyY2VwdCknKSAlPiUgCiAgZ2dwbG90KC4sIGFlcyhsb2cobGFtYmRhKSwgZXN0aW1hdGUsIGdyb3VwPXRlcm0sIGNvbG9yPXRlcm0pKSArIGdlb21fbGluZSgpICsgdGhlbWVfYncoKSAgKyB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAnbm9uZScpICsKICBsYWJzKHRpdGxlPSJSaWRnZSBzaW4gSW50ZXJjZXB0byIsIHk9IkNvZWZpY2llbnRlcyIpCgpwbG90X2dyaWQoZzEsZzIpCmBgYAoKIyMjIEVsZWNjacOzbiBsYW1iZGEgw7NwdGltbwoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KcmlkZ2VfY3Y9Y3YuZ2xtbmV0KHg9cHJvZF9tdHgseT1wcm9kX3ByZWNpb3MsYWxwaGE9MCwgc3RhbmRhcmRpemUgPSBUKQoKYGBgCgoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KcGxvdChyaWRnZV9jdikKCmBgYAoKCiMjIyBTZWxlY2Npb24gbGFtYmRhIMOzcHRpbW8gcGFyYSBjcmVhciBlbCBtb2RlbG8gZmluYWwKCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CiMgU2VsZWNjacOzbiBsYW1iZGEgw7NwdGltbwpyaWRnZV9sYW1iZGFfb3B0ID0gcmlkZ2VfY3YkbGFtYmRhLm1pbgoKIyBFbnRyZW5hbWllbnRvIG1vZGVsbyDDs3B0aW1vCnJpZGdlX29wdCA9IGdsbW5ldCh4PXByb2RfbXR4LCAjIE1hdHJpeiBkZSByZWdyZXNvcmVzCiAgICAgICAgICAgICAgICAgeT1wcm9kX3ByZWNpb3MsICNWZWN0b3IgZGUgbGEgdmFyaWFibGUgYSBwcmVkZWNpcgogICAgICAgICAgICAgICAgIGFscGhhPTAsICMgSW5kaWNhZG9yIGRlbCB0aXBvIGRlIHJlZ3VsYXJpemFjaW9uCiAgICAgICAgICAgICAgICAgc3RhbmRhcmRpemUgPSBUUlVFLCAgIyBFc3RhbmRhcml6YW1vcwogICAgICAgICAgICAgICAgIGxhbWJkYSA9IHJpZGdlX2xhbWJkYV9vcHQpCgojIFNhbGlkYSBlc3RhbmRhcgojcmlkZ2Vfb3B0CnJpZGdlX29wdCAlPiUgdGlkeSgpCgpgYGAKCiMjIENvbXByYWNpb24gZW50cmUgTGFzc28geSBSaWRnZQoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KCnJpZGdlX2RldiA9IHJpZGdlX2NvZWYgJT4lIHNlbGVjdChsYW1iZGEsIGRldi5yYXRpbykgJT4lIGRpc3RpbmN0KCkgJT4lCiAgZ2dwbG90KC4sIGFlcyhsb2cobGFtYmRhKSwgZGV2LnJhdGlvKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9saW5lKCkgKwogIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IGxvZyhyaWRnZV9sYW1iZGFfb3B0KSwgY29sb3I9J3N0ZWVsYmx1ZScsIHNpemU9MS41KSArCiAgbGFicyh0aXRsZT0nUmlkZ2U6IERldmlhbmNlJykgKwogIHRoZW1lX2J3KCkgCgpsYXNzb19kZXYgPSBsYXNzb19jb2VmICU+JSBzZWxlY3QobGFtYmRhLCBkZXYucmF0aW8pICU+JSBkaXN0aW5jdCgpICU+JQogIGdncGxvdCguLCBhZXMobG9nKGxhbWJkYSksIGRldi5yYXRpbykpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fbGluZSgpICsKICBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSBsb2cobGFzc29fbGFtYmRhX29wdCksIGNvbG9yPSdmaXJlYnJpY2snLCBzaXplPTEuNSkgKwogIGxhYnModGl0bGU9J0xhc3NvOiBEZXZpYW5jZScpICsKICB0aGVtZV9idygpCgpwbG90X2dyaWQocmlkZ2VfZGV2LCBsYXNzb19kZXYpCgoKYGBgCgpDb21wcmFjaW9uIGRlIGxhIHJlbGFjacOzbiBlbnRyZSBlbCBwb3JjZW50YWplIGRlIGRldmlhbmNlIGV4cGxpY2FkYSB5IGxhbWJkYSBwYXJhIGxvcyB0cmVzIHRpcG9zIGRlIG1vZGVsb3MgcXVlIHJlYWxpemFtb3MKCgoKIyMgQ29tcHJhY2lvbiBNb2RlbG9zCiMjIyByaWRnZV9vcHQgLCBsYXNzb19vcHQsIGxtX3ByZWNpb011bHRpcGxlX2xvZwojIyMgcHJlY2lvc190cmFpbiAtIHByZWNpb3NfdGVzdAoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KZXZhbF9yZXN1bHRzIDwtIGZ1bmN0aW9uKHRydWUsIHByZWRpY3RlZCwgZGYpIHsKICBTU0UgPC0gc3VtKChwcmVkaWN0ZWQgLSB0cnVlKV4yKQogIFNTVCA8LSBzdW0oKHRydWUgLSBtZWFuKHRydWUpKV4yKQogIFJfc3F1YXJlIDwtIDEgLSBTU0UgLyBTU1QKICBSTVNFID0gc3FydChTU0UvbnJvdyhkZikpCgogIAogICMgTW9kZWwgcGVyZm9ybWFuY2UgbWV0cmljcwpkYXRhLmZyYW1lKAogIFJNU0UgPSBSTVNFLAogIFJzcXVhcmUgPSBSX3NxdWFyZQopCiAgCn0KCmBgYAoKCmBgYHtyLCBmaWcud2lkdGg9OCxmaWcuaGVpZ2h0PTh9CiMgUHJlZGljY2lvbiB5IGV2YWx1YWNpb24gZW4gdHJhaW4gZGF0YSBMYXNzbwpwcmVkaWN0aW9uc190cmFpbiA8LSBwcmVkaWN0KGxhc3NvX29wdCwgcyA9IGxhc3NvX2xhbWJkYV9vcHQsIG5ld3ggPSBwcm9kX210eCkKZXZhbF9yZXN1bHRzKHByZWNpb3NNb2RlbG8kcHJlY2lvLCBwcmVkaWN0aW9uc190cmFpbiwgcHJlY2lvc01vZGVsbykKCgpgYGAKCgpgYGB7ciwgZmlnLndpZHRoPTgsZmlnLmhlaWdodD04fQojIFByZWRpY3Rpb24gYW5kIGV2YWx1YXRpb24gb24gdHJhaW4gZGF0YSBSaWRnZQpwcmVkaWN0aW9uc190cmFpbiA8LSBwcmVkaWN0KHJpZGdlX29wdCwgcyA9IHJpZGdlX2xhbWJkYV9vcHQsIG5ld3ggPSBwcm9kX210eCkKZXZhbF9yZXN1bHRzKHByZWNpb3NNb2RlbG8kcHJlY2lvLCBwcmVkaWN0aW9uc190cmFpbiwgcHJlY2lvc01vZGVsbykKCmBgYAoKYGBge3IsIGZpZy53aWR0aD04LGZpZy5oZWlnaHQ9OH0KCgpgYGAKCg==